/****************************************************************************
// Title: 	what_happens_to_text.do 
// Where? 	"D:/Files/Shasta County/Aria_Scratch/Do"
// by: 		Aria		
// Purpose: This do file reads input and output Trumpia csv and excel files.
			It then produces the text.eps figure.
				
------------------------------------------------------------------------------
TOC
------------------------------------------------------------------------------
// Part 0: 		Setting up the environment
// Part 1: 		Reading and cleaning *Output for Trumpia*
// Part 2:		RCT Round & Date pairs
// Part 3:  	Reading and cleaning *Input from Trumpia* Distribution files
// Part 4:  	Reading and cleaning *Input from Trumpia* Add_ files
				
				-Note: if you uncomment these following lines within this part, 
				we get 3 more obs in Distribution and 3 fewer obs in Contact Related
				// drop if strpos(reason_phone, "duplicated")>0
				// drop if strpos(reason_phone, "dow")>0
				
// Part 5:  	Reading and cleaning *Input from Trumpia* Failed files
// Part 6:  	Merge
// Part 7: 		Graph: What Happens to Text 

*****************************************************************************/

*	****************************************************************************
**#	Part 0: Setting up the environment
*	****************************************************************************
set 	more off, perm
global 	dir 	"D:/Files/Shasta County"

global 	data 	"D:/Files/Shasta County/CarlyWill_Scratch/Data/" 			// 	Change
cap 	mkdir 	"${data}"

global 	temp 	"D:/Files/Shasta County/Aria_Scratch/_temp/Aria_2023" 			// 	Change
cap 	mkdir 	"${temp}"

set 	scheme s1mono

graph 	set window 	fontface Times 
graph 	set eps 	fontface Times

*	****************************************************************************
**#	Part 1: Reading and cleaning *Output for Trumpia*
*	****************************************************************************

cd "${dir}/Output for Trumpia/output_for_trumpia_clean"

/* List file names. */
local filelist: dir . files "*.csv"
di `filelist'

	local m = 1

foreach f of local filelist {

	import delimited using "`f'", stringcols(_all) varname(1) clear
	
		gen 		court_dateD = subinstr( "`f'",".csv","",.)
		replace 	court_dateD = subinstr(court_dateD, "trumpia","",.)
		destring 	court_dateD, replace

		tempfile save`m'
		qui save "`save`m''"
		
	local m `++m'
}

local 	obs = `m'-1
dis 	`obs'

qui use "`save1'", clear
forv i=2/`obs' {
	qui append using "`save`i''"
}

/* missing phone number? */
gen 	missing_phone = phone == "0"
la var 	missing_phone "Missing Phone Number" 

la var 	court_dateD "Court Date"
la var 	court_time 	"Court Time"
la var 	court_dept 	"Court Dept"
la var 	phone 		"Phone Number"
la var 	dow 		"Court Day of Week"
la var 	month 		"Court Month"
la var 	day 		"Court Day of Month"
la var 	year 		"Court Year"
la var 	trumpia_id 	"Trumpia ID (constructed)"

foreach v of varlist _all {
	capture confirm string variable `v'
	if !_rc {
		qui replace `v' = strlower(`v')
		qui replace `v' = ltrim(`v')
		qui replace `v' = rtrim(`v')
	}
}

egen 	rct_round = group(court_dateD)
la var 	rct_round "RCT Round"

order court_dateD rct_round trumpia_id 

save "${data}/trumpia_input.dta", replace
										
*	************************************************************************
**#	Part 2:	RCT Round & Date pairs
*	************************************************************************

use "${data}/trumpia_input.dta", clear

	keep 		court_dateD rct_round
	
	gen 		court_dateS = string(court_dateD, "%td") 
	
	tostring 	rct_round 	, gen(rct_roundS)
	
	duplicates 	drop 
	
save "${data}/round_date.dta", replace 

*	************************************************************************
**#	Part 3:	Trumpia Distribution (Sent)
*	************************************************************************

cd "${dir}/Input from Trumpia/input_from_trumpia_clean"

local filelist: dir . files "Distribution*"
di `filelist'

local m = 1

foreach f of local filelist {

	qui import delimited using "`f'", stringcols(_all) varname(1) clear
	
		/* List is RCT round that need cleaning */
		keep list mobile
		
		/* phone numnber */
		rename 	mobile phone
		replace phone = subinstr(phone, "+1 ", "", .)
		
		/* Labeling */
		la var phone "Phone Numnber" 
		
		replace list 		= "1st RCT Text" if strpos(list, "First")>0
		gen 	fst_space 	= strpos(list, " ")
		gen 	rct_round 	= substr(list, 1, fst_space)
		drop 	fst_space
		drop 	list
		
		local xs " "th" "nd" "rd" "st" "
		foreach x of local xs {
			qui replace rct_round = subinstr(rct_round, "`x'", "",.)
		}
		
		destring 	rct_round 	, replace
		replace 	rct_round 	= (rct_round-1) if rct_round > 16
		
		order 		rct_round 
		tostring 	rct_round, gen(rct_roundS)
		compress
			
		tempfile save`m'
		qui save "`save`m''"
	local m `++m'
}

local obs = `m'-1
	dis `obs'

qui use "`save1'", clear
forv i=2/`obs' {
	qui append using "`save`i''"
}

merge m:1 rct_roundS using "${data}/round_date.dta", keep(match master) nogen	
	
save "${data}/trumpia_dist.dta", replace
										
*	************************************************************************
**#	Part 4:	Trumpia Add_ (Contact Related)
*	************************************************************************

cd "${dir}/Input from Trumpia/input_from_trumpia_clean"

local filelist: dir . files "Add_*"
di `filelist'

	local m = 1

foreach f of local filelist {

	qui import delimited using "`f'", stringcols(_all) varname(1) clear
	
		gen 		_source 	= subinstr( "`f'",".csv","",.)
		gen 		court_dateD = regexs(0) if(regexm(_source, "[0-9][0-9][0-9][0-9][0-9]$"))
		destring 	court_dateD	, replace
		gen 		court_dateS = string(court_dateD, "%td")
		drop _source
		
		tempfile save`m'
		qui save "`save`m''"
		
	local m `++m'
}

local 	obs 	= `m'-1	
dis 	`obs'

qui use "`save1'", clear
forv i=2/`obs' {
	qui append using "`save`i''"
}


rename v10 reason_phone

foreach v of varlist v9 v11 {
	replace reason_phone = reason_phone + `v' if strpos(`v', "[phone]")>0
	drop 	`v' 
}
	

replace reason_phone = invalidmessage if missing(reason_phone)

// drop if strpos(reason_phone, "duplicated")>0 								//////// HERE
// drop if strpos(reason_phone, "dow")>0

gen 	reason_catP ="" 
replace reason_catP ="Blocked (Contact)" 		if strpos(reason_phone,"blocked")>0
replace reason_catP ="Invalid Length (Contact)" if strpos(reason_phone,"length")>0
la var  reason_catP	"Fail Reason Category"

gen 	contact_related_nonzero = phone != "0"
la var 	contact_related_nonzero "Non-Missing Contact Fail"

la var court_dateD 	"Court Date"
la var phone 		"Phone Number"
la var trumpia_id 	"Trumpia ID (constructed)"

keep court_dateD court_dateS trumpia_id phone reason_catP contact_related_nonzero 

qui compress

merge m:1 court_dateS using "${data}/round_date.dta", keep(match master) nogen

save "${data}/trumpia_add.dta", replace

*************************************************************************
**#	Part 5: Trumpia Failed (Carrier Related)
*************************************************************************
cd "${dir}/Input from Trumpia/input_from_trumpia_clean"

local filelist: dir . files "Failed*"
di `filelist'

	local m = 1

foreach f of local filelist {

	qui import excel "`f'", sheet("Sheet2") firstrow allstring clear
	
		gen 		_source 	= subinstr( "`f'",".xlsx","",.)
		replace 	_source 	= subinstr(_source, "failedtexts", "",.)
		
		gen 		day 		= regexs(0) if regexm(_source, "[0-9]*$")
		destring 	day 		, replace
		
		gen 		mdate 		= regexs(0) if regexm(_source, "^[a-z]+")
		gen 		month 		= month(date(mdate, "M"))
		drop 		mdate
		
		gen 		year 		= 2021
		
		gen 		court_dateD = mdy(month, day, year)
		replace 	court_dateD = court_dateD + 3
		replace 	court_dateD = (court_dateD-1) if _source == "jun9"
		replace 	court_dateD = (court_dateD-2) if _source == "may24"
		gen			court_dateS = string(court_dateD, "%td")
		
		drop 		if regexm(Number, "[0-9]") == 0
		drop 		if Number == "0"
		
		foreach var of varlist _all {
			capture assert missing(`var')
			if !_rc {
				qui drop `var'
			}
		}
		
		drop 	Name
		rename 	Number phone 
		
		tempfile save`m'
		qui save "`save`m''"
	local m `++m'
}

local obs = `m'-1
	dis `obs'

qui use "`save1'", clear
forv i=2/`obs' {
	qui append using "`save`i''"
}

rename FailReason reason_carrier 
la var reason_carrier "Failed Reason Carrier-Related"

gen 	reason_catC ="" 
replace reason_catC ="Will Retry (Carrier)" 		if strpos(reason_carrier, "retried")>0
replace reason_catC ="Illegal or no SMS (Carrier)" 	if strpos(reason_carrier, "For example")>0
replace reason_catC ="Anti-Spam Program (Carrier)" 	if strpos(reason_carrier, "anti-spam")>0
replace reason_catC ="No Text Support (Carrier)" 	if strpos(reason_carrier, "does not support text")>0
la var 	reason_catC	"Fail Reason Category"
	
keep 	phone reason_catC court_dateD court_dateS
compress 

merge m:1 court_dateS using "${data}/round_date.dta", keep(match master) nogen
	
save "${data}/trumpia_fail.dta", replace

*************************************************************************
**#	Part 6: Merge
*************************************************************************
cd "${data}"

global mvar_11 		court_dateD rct_round phone trumpia_id
global mvar_m1 		court_dateD rct_round phone

use "trumpia_input.dta", clear

	merge 		1:1 ${mvar_11} using "trumpia_add.dta"
		gen 	contact_related = _merge == 3
		la var 	contact_related "Contact-Related Fail"
		replace contact_related_nonzero = 0 if missing(contact_related_nonzero) // NOTE
		drop 	_merge
	
	merge 		m:1 ${mvar_m1} using "trumpia_fail.dta"
		gen 	carrier_related = _merge == 3
		la var 	carrier_related "Carrier-Related Fail"
		drop 	_merge	
	
	merge 		m:1 ${mvar_m1} using "trumpia_dist.dta"
		gen 	distribution = _merge == 3
		la var 	distribution "Record in Distribution Files"
		drop 	_merge		
	
gen 	fail_source =""
replace fail_source ="Contact" if contact_related == 1
replace fail_source ="Carrier" if carrier_related == 1
la var 	fail_source "Fail Source"

egen 	reason_cat = concat(reason_catP reason_catC)
la var 	reason_cat "Fail Reason Category"
 
local  v  "court_dateD court_time court_dept dow month day year rct_round trumpia_id phone missing_phone reason_cat fail_source contact_related contact_related_nonzero carrier_related distribution "
order `v' 

*************************************************************************
**#	Part 7: Graph: What Happens to Text 
*************************************************************************

// flag problematic rounds: 
gen 	flag = 1 if inlist(rct_round, 2,17,23,24)
la var 	flag "Incomplete Trumpia Export"

// drop problematic rounds: 
drop if flag == 1

// if a record is in both "distribution" and "contract related fail":
replace distribution = 0 if contact_related_nonzero == 1


destring trumpia_id, force replace
format court_dateD %td
merge 1:1 trumpia_id court_dateD using "D:\Files\Shasta County\CarlyWill_Scratch\Data\missing_help_cw" 

keep if _merge==3
drop _merge
/*
*************************************************************************
**#	Part 8: Add FTA Rates
*************************************************************************
merge 1:m person_key court_dateD  using "${data}/OutcomeCourtDatesAndHistory_v0.dta", force
keep if _merge==3

gen rctfta = fta == "Y"
by person_key court_dateD , sort: egen fta_any = max(rctfta)
by person_key court_dateD , sort: keep if _n == 1

*we lost a few here... not sure why
g net_fta=0
	replace net_fta=1 if distribution==1  & carrier_related==0
g stop=0
	replace stop=1 if  missing_phone==0  & distribution==0 & contact_related_nonzero==0  

 sum fta_any if stop==1
/* 

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
     fta_any |         22    .3181818    .4767313          0          1

. sum fta_any if net_fta==1

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
     fta_any |        437    .4416476    .4971525          0          1
**/
*/
//	------------------
// 	Figure prep
//	------------------

// 		obs counter for collapse
gen 	records = 1

// 		non-missing phone records:
gen 	records_valid 	= (records - missing_phone)
la var 	records 		"Court Records"
la var 	missing_phone 	"Court Records Missing Phone #"
la var 	records_valid 	"Court Records Non-Missing Phone #"

la var 	contact_related "Contact Related Fail (_add)"
la var 	contact_related_nonzero  "Contact Related Fail Non-Missing Phone #(_add)"
la var 	carrier_related "Carrier-Related Fail (_fail)"
la var 	distribution 	"Record in Distribution Files"


collapse ///
	(sum) 	records missing_phone records_valid contact_related contact_related_nonzero ///
			distribution  carrier_related , by(homeless_flag)
			
// 		distribution_net = texts that are acrually sent without ant problem:
gen 	distribution_net = (distribution - carrier_related)
replace distribution_net = 0 if distribution == 0 
la var 	distribution_net "Texts Successfully Sent (_dist - _fail)"

// 		Remaining: People (most prob) texted back "stop":
gen 	stop = records_valid - distribution - contact_related_nonzero
la var 	stop "Replied STOP"


preserve
keep if homeless_flag=="T"
//	--------------------------------------
// 	Figure (unhoused) 
//	--------------------------------------

local vars 		///
				distribution_net 	///
				carrier_related 	///
				stop 				///
				contact_related

// 	Create label (% of each category):
foreach i of local vars { 
	local `i'_lbl : display %3.0f  (`i'/records)*100
}


graph bar `vars', /// 
	stack text(	25 50 "`distribution_net_lbl'%" "Message Sent to Phone" ///
				85 50 "`carrier_related_lbl'%" "Carrier Failure" ///
				113 50 "`stop_lbl'% Replied STOP" ///
				145 50 "`contact_related_lbl'%" "Missing Phone Number", ///
				color(white)) ///
	legend(off) ytitle(Number of Texts)
	

graph save 		"D:/Files/Shasta County/CarlyWill_Scratch/Output/texts_20230309_UH" 	, replace
graph export 	"D:/Files/Shasta County/CarlyWill_Scratch/Output/texts_20230309_UH.eps"	, replace
graph export 	"D:/Files/Shasta County/CarlyWill_Scratch/Output/texts_20230309_UH.pdf"	, replace

restore

preserve
keep if homeless_flag=="F"
//	--------------------------------------
// 	Figure (housed) 
//	--------------------------------------

local vars 		///
				distribution_net 	///
				carrier_related 	///
				stop 				///
				contact_related

// 	Create label (% of each category):
foreach i of local vars { 
	local `i'_lbl : display %3.0f  (`i'/records)*100
}


graph bar `vars', /// 
	stack text(	200 50 "`distribution_net_lbl'%" "Message Sent to Phone" ///
				475 50 "`carrier_related_lbl'%" "Carrier Failure" ///
				587 50 "`stop_lbl'% Replied STOP" ///
				660 50 "`contact_related_lbl'%" "Missing Phone Number", ///
				color(white)) ///
	legend(off) ytitle(Number of Texts)
	

graph save 		"D:/Files/Shasta County/CarlyWill_Scratch/Output/texts_20230309_H" 	, replace
graph export 	"D:/Files/Shasta County/CarlyWill_Scratch/Output/texts_20230309_H.eps"	, replace
graph export 	"D:/Files/Shasta County/CarlyWill_Scratch/Output/texts_20230309_H.pdf"	, replace

restore


collapse ///
	(sum) 	records missing_phone records_valid contact_related contact_related_nonzero ///
			distribution_net  carrier_related stop

//	--------------------------------------
// 	Figure (All - both housed & unhoused) 
//	--------------------------------------

local vars 		///
				distribution_net 	///
				carrier_related 	///
				stop 				///
				contact_related

// 	Create label (% of each category):
foreach i of local vars { 
	local `i'_lbl : display %3.0f  (`i'/records)*100
}


graph bar `vars', /// 
	stack text(	222 50 "`distribution_net_lbl'%" "Message Sent to Phone" ///
				564 50 "`carrier_related_lbl'%" "Carrier Failure" ///
				700 50 "`stop_lbl'% Replied STOP" ///
				800 50 "`contact_related_lbl'%" "Missing Phone Number", ///
				color(white)) ///
	legend(off) ytitle(Number of Texts)
	

graph save 		"D:/Files/Shasta County/CarlyWill_Scratch/Output/texts_20230309" 	, replace
graph export 	"D:/Files/Shasta County/CarlyWill_Scratch/Output/texts_20230309.eps"	, replace
graph export 	"D:/Files/Shasta County/CarlyWill_Scratch/Output/texts_20230309.pdf"	, replace








	
	
	
	
	
	
	
	